##9/19/2015
##Replication_Code_8
## This script reproduces Fig 8: triple plot
# top one is "boxplot and outliers"
# middle is same boxplots, but on STANDARDIZED data
# bottom is declining number of outliers over time
# (code also spits the number of MPs above the 90th percentile)
rm(list=ls())
setwd("./../data/") # "C:/Users/as9934/Dropbox/HansardProject/burstiness/bjps_replication/data")

#load the panel data (only the Cs and Ls for this script)
load("paneldata.rdata")

#grab length of sessions
sessions <-read.csv("session_dates_all.csv")
starts <-  as.Date(as.character(sessions$begin), "%m/%d/%Y")
ends   <-  as.Date(as.character(sessions$ended), "%m/%d/%Y")
slengths <- ends - starts





#function to find governing party in that session
gov.party <- function(x=subs){
  govp <- names(which.max(table(subs$party[subs$cabinet==1])))
  oppp <- names(table(subs$party))[names(table(subs$party))!=govp]
  c(govp,oppp)
}

#get unique session info

sup <-   sort(unique(panel.data$session))

#record size of oppn
size.opp <- c()


mean.opp.b <- c() #mean oppn burstiness
std.opp.b <- c() #stdev of oppn burstiness
box.data <- list() #boxplot data

#cols for later
cols <- c()

for(i in 1:length(sup)){
  
  subs <- subset(panel.data, panel.data$session==sup[i])
  
  #define gov party, and oppn party
  governing.party <-  gov.party(subs)[1]
  oppn.party <- gov.party(subs)[-c(1)]
  
  
  #look only at Libs and Tories
  if("C"%in%oppn.party){oppn.party2 <- "C"; cols<-c(cols,"lightsteelblue1")}else{
    oppn.party2 <- "L"; cols<-c(cols,"yellow")}
  
  #record size of oppn caucus
  size.opp <- c(size.opp, sum(subs$party==oppn.party2) )
  
  oppn.burstiness <- subs$burstiness.actual[subs$party%in%oppn.party2]
  mean.opp.b <- c(mean.opp.b, mean(oppn.burstiness))
  std.opp.b  <- c(std.opp.b ,sd(oppn.burstiness) )
  box.data[[i]] <- oppn.burstiness
}


slist <- as.character(sup)
slist[length(slist)] <- "1910"
labs <-  gsub("_.*","",slist)

uni.sess <-  unique(gsub("_.*","",slist))
m <- match(uni.sess, labs)
labs2 <- rep(NA, length(labs) )
labs2[m] <- uni.sess


par(mfrow=c(3,1))
par(mar=c(3,4,2,1))
par(bg='cornsilk1')

#have liberal outliers as triangles, tories as circles
pchs <- cols
pchs[cols=="lightsteelblue1"] <- 21
pchs[cols=="yellow"] <- 22
pchs <- as.numeric(pchs)

require(scales)

#rescale boxplots
ress <-function (x, to = c(0, 1), from = range(x, na.rm = TRUE)) 
{
  if (zero_range(from) || zero_range(to)) 
    return(rep(mean(to), length(x)))
  (x - from[1])/diff(from) * diff(to) + to[1]
}

res.box <- lapply(box.data, ress)

###############################################################
## FIRST PLOT: boxplots                                     ###
###############################################################

#raw data
boxplot(box.data,   outpch = pchs,outcol="black",outbg = cols, axes=F, outcex=1.5, ylab="")
axis(1, at=1:length(sup), labels=labs2)
axis(2)
box()

#show that mean + sd increased
require(strucchange)
bpm <- breakpoints(mean.opp.b~1) #one bp around 1880
bps <- breakpoints(std.opp.b~1) #one bp around 1880

abline(v=bpm$breakpoints, lty=3)
legend("topleft",pch=c(21,22), legend=c("Conservative","Liberal"), col=c("black","black"),
       pt.bg=c("lightsteelblue1","yellow"), pt.cex=c(1.6,1.6))

#put values on plot
meanfirst <- mean(mean.opp.b[1:bpm$breakpoints[1]])
meansecond <-  mean(mean.opp.b[(bpm$breakpoints[1]+1):length(sup)   ])

text(27,1e09,paste("mean=",round(meanfirst, d=2),sep=""))
text(27, .8e09,paste("sd=",round(mean(std.opp.b[1:bpm$breakpoints[1]]), d=2),sep=""))

text(72,1.5e09,paste("mean=",round(meansecond, d=2),sep=""))
text(72,1.3e09,paste("sd=",round(mean(std.opp.b[(bpm$breakpoints[1]+1):length(sup)]
), d=2),sep=""))

###############################################################
## SECOND PLOT: standardized - medians and 90th percentiles ###
###############################################################


meds <- c()
p90s <- c()
p99s <- c()
cols <- c()

for(i in 1:length(sup)){
  subs <- subset(panel.data, panel.data$session==sup[i])
  
  #define gov party
  governing.party <-  gov.party(subs)[1]
  oppn.party <- gov.party(subs)[-c(1)]
  
  #get oppn
  subs.opp <- subs[subs$party%in%oppn.party,]
  
  rb <- rescale(subs.opp$burstiness.actual, to=c(0, 1))
  
  
  meds <- c(meds, quantile(rb, .5))
  p90s <- c(p90s, quantile(rb, .90))
  
  cat("\n there were ",length(which(rb>=quantile(rb,.90))),"MPs above the 90th percentile\n")
  
  #look only at Libs and Tories
  if("C"%in%oppn.party){oppn.party2 <- "C"; cols<-c(cols,"lightsteelblue1")}else{
    oppn.party2 <- "L"; cols<-c(cols,"yellow")}
  
}

par(las=2)
par(bg='cornsilk1')

plot(1:length(sup), meds, ylim=c(0,.4), xlab="", type="p", pch="_", axes=F, ylab="std burstiness")
axis(1, at=1:length(sup), labels=labs2)
axis(2)
box()


#have liberal high percentiles as triangles, tories as circles
pchs <- cols
pchs[cols=="lightsteelblue1"] <- 21
pchs[cols=="yellow"] <- 22
pchs <- as.numeric(pchs)

points(1:length(sup), p90s, pch=pchs, bg=cols, col='black', cex=1.6)

loww <- lowess(p90s~1:length(sup))

lines(loww$x, loww$y,col="red", lwd=2)

legend("topleft",pch=c(21,22), legend=c("Conservative","Liberal"), col=c("black","black"),
       pt.bg=c("lightsteelblue1","yellow"), pt.cex=c(1.6,1.6))



######################################
### THIRD PLOT ######################
######################################

#look at number of outliers as marker of hierarchy: more outliers => more 
#concentrated power structure
liers <- c()
for(j in 1:length(box.data)){
  boxy <- boxplot(box.data[[j]], plot=F)
  liers <- c(liers, length(boxy$out))
}      

#divide by share of oppn? (size.opp)
share <- liers

plot(1:length(sup), share, axes=F, ylab="outliers", col="black", bg=cols, pch=pchs,
     cex=1.6, xlab="")
axis(1, at=1:length(sup), labels=labs2)
axis(2)
low <- lowess(share~ 1:length(sup))
lines(low$x, low$y, lwd=2, col="red")
box()

legend("bottomleft",pch=c(21,22), legend=c("Conservative","Liberal"), col=c("black","black"),
       pt.bg=c("lightsteelblue1","yellow"), pt.cex=c(1.6,1.6))


sbps <- breakpoints(share~1)
abline(v=sbps$breakpoints, lty=3)
meanfirst <- mean(share[1:sbps$breakpoints[1]])
meansecond <-  mean(share[(sbps$breakpoints[1]+1):sbps$breakpoints[2]])
meanthird <- mean(share[(sbps$breakpoints[2]+1):length(share)])


text(21,89,paste("mean=",round(meanfirst, d=2),sep=""))
text(51,89,paste("mean=",round(meansecond, d=2),sep=""))
text(72,89,paste("mean=",round(meanthird, d=2),sep=""))
